From 3fd07d7995f71150b7c6aed3ed9ee3fa4f3ef805 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Thu, 16 Mar 2006 18:41:01 +0100 Subject: [PATCH] Add a shadow VRAM to track changes to the real VRAM. When the guest OS was given write access to the VRAM the device model tracked all VRAM changes by updating the entire screen on every output loop, causing significant overhead (a CPU bound loop in a guest slows down by about 35%) and significant mouse latency (VNC uses the same data path for mouse events and video updates). With the shadow VRAM only modified pages need to be updated and the comparison of the shadow VRAM to the real VRAM only adds ~4% overhead while eliminating the mouse latencies. Signed-off-by: Don Dugger --- tools/ioemu/hw/vga.c | 114 +++++++++++++++++++++++++++- tools/ioemu/hw/vga_int.h | 1 + tools/ioemu/target-i386-dm/Makefile | 11 ++- 3 files changed, 122 insertions(+), 4 deletions(-) diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c index b74498456c..51980ba79c 100644 --- a/tools/ioemu/hw/vga.c +++ b/tools/ioemu/hw/vga.c @@ -1340,6 +1340,99 @@ void vga_invalidate_scanlines(VGAState *s, int y1, int y2) } } +extern inline int cmp_vram(VGAState *s, int offset, int n) +{ + long *vp, *sp; + + if (s->vram_shadow == NULL) + return 1; + vp = (long *)(s->vram_ptr + offset); + sp = (long *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) { + if (*vp++ != *sp++) { + memcpy(sp - 1, vp - 1, n + sizeof(*vp)); + return 1; + } + } + return 0; +} + +#ifdef USE_SSE2 + +#include +#include +#include + +int sse2_ok = 1; + +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + + return edx; +} + +jmp_buf sse_jbuf; + +void intr(int sig) +{ + sse2_ok = 0; + longjmp(sse_jbuf, 1); +} + +void check_sse2(void) +{ + /* Check 1: What does CPUID say? */ + if ((cpuid_edx(1) & 0x4000000) == 0) { + sse2_ok = 0; + return; + } + + /* Check 2: Can we use SSE2 in anger? */ + signal(SIGILL, intr); + if (setjmp(sse_jbuf) == 0) + __asm__("xorps %xmm0,%xmm0\n"); +} + +int vram_dirty(VGAState *s, int offset, int n) +{ + __m128i *sp, *vp; + + if (s->vram_shadow == NULL) + return 1; + if (sse2_ok == 0) + return cmp_vram(s, offset, n); + vp = (__m128i *)(s->vram_ptr + offset); + sp = (__m128i *)(s->vram_shadow + offset); + while ((n -= sizeof(*vp)) >= 0) { + if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) { + while (n >= 0) { + _mm_store_si128(sp++, _mm_load_si128(vp++)); + n -= sizeof(*vp); + } + return 1; + } + sp++; + vp++; + } + return 0; +} +#else /* !USE_SSE2 */ +int vram_dirty(VGAState *s, int offset, int n) +{ + return cmp_vram(s, offset, n); +} + +void check_sse2(void) +{ +} +#endif /* !USE_SSE2 */ + /* * graphic modes */ @@ -1434,6 +1527,9 @@ static void vga_draw_graphic(VGAState *s, int full_update) printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n", width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]); #endif + for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE) + if (vram_dirty(s, y, TARGET_PAGE_SIZE)) + cpu_physical_memory_set_dirty(s->vram_offset + y); addr1 = (s->start_addr * 4); bwidth = width * 4; y_start = -1; @@ -1536,9 +1632,18 @@ static void vga_draw_blank(VGAState *s, int full_update) void vga_update_display(void) { + static int loop; VGAState *s = vga_state; int full_update, graphic_mode; + /* + * Only update the display every other time. The responsiveness is + * acceptable and it cuts down on the overhead of the VRAM compare + * in `vram_dirty'. + */ + if (loop++ & 1) + return; + if (s->ds->depth == 0) { /* nothing to do */ } else { @@ -1569,7 +1674,6 @@ void vga_update_display(void) full_update = 1; } - full_update = 1; switch(graphic_mode) { case GMODE_TEXT: vga_draw_text(s, full_update); @@ -1874,7 +1978,13 @@ void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, #else s->vram_ptr = qemu_malloc(vga_ram_size); #endif - + check_sse2(); + s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1); + if (s->vram_shadow == NULL) + fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, " + "mouse will be slow\n", vga_ram_size); + s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1) + & ~(TARGET_PAGE_SIZE - 1)); s->vram_offset = vga_ram_offset; s->vram_size = vga_ram_size; s->ds = ds; diff --git a/tools/ioemu/hw/vga_int.h b/tools/ioemu/hw/vga_int.h index ca7ec373bc..1789df51e6 100644 --- a/tools/ioemu/hw/vga_int.h +++ b/tools/ioemu/hw/vga_int.h @@ -76,6 +76,7 @@ #define VGA_STATE_COMMON \ uint8_t *vram_ptr; \ + uint8_t *vram_shadow; \ unsigned long vram_offset; \ unsigned int vram_size; \ uint32_t latch; \ diff --git a/tools/ioemu/target-i386-dm/Makefile b/tools/ioemu/target-i386-dm/Makefile index 887f73bea8..3219711167 100644 --- a/tools/ioemu/target-i386-dm/Makefile +++ b/tools/ioemu/target-i386-dm/Makefile @@ -13,8 +13,15 @@ ifdef CONFIG_USER_ONLY VPATH+=:$(SRC_PATH)/linux-user DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH) endif -CFLAGS+=-g -fno-strict-aliasing -LDFLAGS=-g + +SSE2 := $(call test-gcc-flag,$(CC),-msse2) +ifeq ($(SSE2),-msse2) +CFLAGS += -DUSE_SSE2=1 -msse2 +endif + +CFLAGS += -g -fno-strict-aliasing $(LOCAL_CFLAGS) +LDFLAGS = -g + LIBS= HELPER_CFLAGS=$(CFLAGS) DYNGEN=../dyngen$(EXESUF) -- 2.30.2